Create the design matrix
### Step 2: Create a DGEList object
dge <- DGEList(counts = cts_filtered)
# Optional: TMM normalization (especially if library sizes vary)
dge <- calcNormFactors(dge)
### Step 3: Create the design matrix
# design <- model.matrix(~ 0 + met_site_group, data = meta_data)
# colnames(design) <- levels(meta_data$met_site_group)
design <- model.matrix(~ 0 + combined_class, data = meta_data)
# colnames(design) <- levels(meta_data$combined_class)
# unique(meta_data$combined_class)
### Step 4: Create contrast matrix
# contrast.matrix <- makeContrasts(
# ## primary as normative
# primary_vs_liver = liver_tropic - primary,
# primary_vs_lung_tropic = lung_tropic - primary,
# primary_vs_aggressive_metastatic = aggressive_metastatic - primary,
# primary_vs_broadly_metastatic = broadly_metastatic - primary,
# primary_vs_weak_metastatic = weak_metastatic - primary,
# primary_vs_non_metastatic = non_metastatic - primary,
# primary_vs_liver_and_lung_metastatic = liver_and_lung_metastatic - primary,
# ## skin as normative
# # skin_vs_lung = lung - skin,
# # skin_vs_liver = liver - skin,
# # skin_vs_lymph_node = lymph_node - skin,
# # skin_vs_pleural_effusion = pleural_effusion - skin,
# # skin_vs_central_nervous_system = central_nervous_system - skin,
# # skin_vs_ascites = ascites - skin,
# # skin_vs_small_intestine = small_intestine - skin,
# # ## lymph_node as normative
# # lymph_node_vs_lung = lung - lymph_node,
# # lymph_node_vs_liver = liver - lymph_node,
# # lymph_node_vs_lymph_node = lymph_node - lymph_node,
# # lymph_node_vs_pleural_effusion = pleural_effusion - lymph_node,
# # lymph_node_vs_central_nervous_system = central_nervous_system - lymph_node,
# # lymph_node_vs_ascites = ascites - lymph_node,
# # lymph_node_vs_small_intestine = small_intestine - lymph_node,
# levels = design)
contrast.matrix <- makeContrasts(
primary_vs_liver_tropic = combined_classliver_tropic - combined_classprimary,
primary_vs_lung_tropic = combined_classlung_tropic - combined_classprimary,
primary_vs_aggressive_metastatic = combined_classaggressive_metastatic - combined_classprimary,
primary_vs_broadly_metastatic = combined_classbroadly_metastatic - combined_classprimary,
primary_vs_weak_metastatic = combined_classweak_metastatic - combined_classprimary,
primary_vs_non_metastatic = combined_classnon_metastatic - combined_classprimary,
primary_vs_liver_and_lung_metastatic = combined_classliver_and_lung_metastatic - combined_classprimary,
levels = design
)
Annotate DE Results
We annotate the DE genes with Ensembl and Entrez IDs from
Biomart.
getBM(attributes = c("ensembl_gene_id", "entrezgene_id", "external_gene_name"),
mart = useDataset("hsapiens_gene_ensembl", useMart("ensembl"))) %>%
dplyr::rename(ensembl_id = ensembl_gene_id,
entrez_id = entrezgene_id,
gene_name = external_gene_name) %>%
dplyr::filter(stringr::str_length(gene_name) > 1,
!duplicated(ensembl_id),
!duplicated(gene_name)) %>%
saveRDS(file = "./data/human_biomart.rds")
## read the table that maps gene symbols to entrex and ensembl ids
human_biomart <- readRDS(file = "./data/human_biomart.rds")
# Extract top differentially expressed genes
topTable(fit2, coef = "primary_vs_liver_tropic", adjust.method = "BH",
number = nrow(cts_filtered)) %>%
tibble::rownames_to_column(var = "gene_name") %>%
dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "liver") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
dplyr::select(gene_name, contains("id"), everything()) -> liver_de
topTable(fit2, coef = "primary_vs_lung_tropic", adjust.method = "BH",
number = nrow(cts_filtered)) %>%
tibble::rownames_to_column(var = "gene_name") %>%
dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "lung") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
dplyr::select(gene_name, contains("id"), everything()) -> lung_de
topTable(fit2, coef = "primary_vs_aggressive_metastatic", adjust.method = "BH",
number = nrow(cts_filtered)) %>%
tibble::rownames_to_column(var = "gene_name") %>%
dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "aggressive_metastatic") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
dplyr::select(gene_name, contains("id"), everything()) -> aggressive_metastatic_de
topTable(fit2, coef = "primary_vs_broadly_metastatic", adjust.method = "BH",
number = nrow(cts_filtered)) %>%
tibble::rownames_to_column(var = "gene_name") %>%
dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "broadly_metastatic") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
dplyr::select(gene_name, contains("id"), everything()) -> broadly_metastatic_de
topTable(fit2, coef = "primary_vs_weak_metastatic", adjust.method = "BH",
number = nrow(cts_filtered)) %>%
tibble::rownames_to_column(var = "gene_name") %>%
dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "weak_metastatic") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
dplyr::select(gene_name, contains("id"), everything()) -> weak_metastatic_de
topTable(fit2, coef = "primary_vs_non_metastatic", adjust.method = "BH",
number = nrow(cts_filtered)) %>%
tibble::rownames_to_column(var = "gene_name") %>%
dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "non_metastatic") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
dplyr::select(gene_name, contains("id"), everything()) -> non_metastatic_de
topTable(fit2, coef = "primary_vs_liver_and_lung_metastatic", adjust.method = "BH",
number = nrow(cts_filtered)) %>%
tibble::rownames_to_column(var = "gene_name") %>%
dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "liver_and_lung_metastatic") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
dplyr::select(gene_name, contains("id"), everything()) -> liver_and_lung_metastatic_de
# topTable(fit2, coef = "primary_vs_small_intestine", adjust.method = "BH",
# number = nrow(cts_filtered)) %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::left_join(human_biomart, by = "gene_name") %>%
# dplyr::left_join(
# cts_filtered %>%
# tibble::rownames_to_column(var = "gene_name") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# skin_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "small_intestine") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id))),
# primary_mean_exp = mean(c_across(
# meta_data %>%
# dplyr::filter(combined_class == "primary") %>%
# tibble::rownames_to_column(var = "depmap_id") %>%
# dplyr::pull(depmap_id)))) %>%
# dplyr::ungroup() %>%
# dplyr::select(gene_name, contains("exp")),
# by = "gene_name") %>%
# dplyr::select(gene_name, contains("id"), everything()) -> si_de
Results
A Volcano plot (log2 fold change vs -log10 p-value) helps identify
genes that display large magnitude changes and high significance.